5200 Final Project Pie Chart

Salary Pie Chart

library(tidyverse)
# Source: https://legacy.baseballprospectus.com/compensation/?team=WAS
salary_2019 <- read_csv("Salary_2019.csv")
# Source: https://www.spotrac.com/mlb/washington-nationals/payroll/2023/
salary_2023 <- read_csv("Salary_2023.csv")

# Looking at position and salary column
salary_2019 <- salary_2019 %>% dplyr::select("Pos","Salary") %>% na.omit()
salary_2023 <- salary_2023 %>% dplyr::select("POS.","BASE SALARY") %>% na.omit()

# check positions in each dataset
unique(salary_2019$Pos)
 [1] "SP"  "3B"  "1B"  "2B"  "RF"  "C"   "RP"  "SS"  "CF"  "LF"  "DNP" "PH" 
unique(salary_2023$`POS.`)
 [1] "SP"    "RP/CL" "RF"    "1B"    "RP"    "C"     "2B"    "SS"    "LF"   
[10] "3B"    "OF"    "CF"   
# replace the 2023 "RP/CL" to "RP", change the dollar format into value format
salary_2023 <- salary_2023 %>% mutate(Pos = ifelse(`POS.`!="RP/CL",`POS.`,"RP")) %>% mutate(Salary_2023=as.numeric(gsub("\\$", "", gsub(",", "", `BASE SALARY`))))  %>% dplyr::select(Pos, Salary_2023)

# change the 2019 data dollar format into value format
salary_2019 <- salary_2019 %>% mutate(Salary_2019=as.numeric(gsub("\\$", "", gsub(",", "", Salary))))

# Group by position
salary_2023_1 <- salary_2023 %>% group_by(Pos) %>% summarize(salary_sum_2023=sum(as.numeric(Salary_2023)))
# calculate total
salary_2023_1$total_2023 <- sum(salary_2023_1$salary_sum_2023)
# calculate percentage
salary_2023_1$percentage_2023 <- salary_2023_1$salary_sum_2023/salary_2023_1$total_2023

# Group by position
salary_2019_1 <- salary_2019 %>% group_by(Pos) %>% summarize(salary_sum_2019=sum(as.numeric(Salary_2019)))
# calculate total
salary_2019_1$total_2019 <- sum(salary_2019_1$salary_sum_2019)
# calculate percentage
salary_2019_1$percentage_2019 <- salary_2019_1$salary_sum_2019/salary_2019_1$total_2019
# Choose the top 4 and combine the rest
top_four_2019 <- salary_2019_1 %>% arrange(desc(percentage_2019)) %>% slice(1:4)
others_2019 <- salary_2019_1 %>% arrange(desc(percentage_2019)) %>% slice(5:n()) %>% summarise(Pos = 'Others',salary_sum_2019 = sum(salary_sum_2019),total_2019 = first(total_2019), percentage_2019 = sum(percentage_2019))
salary_2019_final <- rbind(top_four_2019,others_2019)
# Rename the positions
salary_2019_final <- salary_2019_final %>% mutate(Pos= case_when(
      Pos == "SP" ~ "Starting Pitcher",
      Pos == "1B"~ "First Baseman",
      Pos == "3B"~ "Third Baseman",
      Pos == "RP"~ "Relief Pitcher",
      TRUE ~ "Others"))

# Get the total salary for 2019
salary_total_2019 <- mean(salary_2019_final$total_2019,na.rm=TRUE)
library(plotly)
# Create an interactive pie chart for 2019
pie_chart_2019 <- plot_ly(
  labels = ~salary_2019_final$Pos,            # Labels from the 'brands' vector
  values = ~salary_2019_final$salary_sum_2019,      # Values from the 'market_share' vector
  type = 'pie',                # Specify the chart type to be pie
  textinfo = 'percent',  # Display labels and percentage on the chart
  hoverinfo = 'label+value+text',
  hovertext = ~paste("Total Salary in 2019:", format(salary_total_2019, big.mark = ",", scientific = FALSE)),
  textposition = 'inside',     # Position the text inside the slices
  marker = list(line = list(color = '#FFFFFF', width = 2)), # Set slice borders
  height = sqrt(salary_total_2019)/30, 
  width = sqrt(salary_total_2019)/30
)

# Adding title and enhancing the layout
pie_chart_2019 <- layout(pie_chart_2019, 
                      title = 'Nationals Salaries By Positions in 2019')

pie_chart_2019
# Choose the top 4 and combine the rest
top_four_2023 <- salary_2023_1 %>% arrange(desc(percentage_2023)) %>% slice(1:4)
others_2023 <- salary_2023_1 %>% arrange(desc(percentage_2023)) %>% slice(5:n()) %>% summarise(Pos = 'Others',salary_sum_2023 = sum(salary_sum_2023),total_2023 = first(total_2023), percentage_2023 = sum(percentage_2023))
salary_2023_final <- rbind(top_four_2023,others_2023)
# Rename the positions
salary_2023_final <- salary_2023_final %>% mutate(Pos= case_when(
      Pos == "SP" ~ "Starting Pitcher",
      Pos == "1B"~ "First Baseman",
      Pos == "2B"~ "Second Baseman",
      Pos == "RP"~ "Relief Pitcher",
      TRUE ~ "Others"))

# Get the total salary for 2023
salary_total_2023 <- mean(salary_2023_final$total_2023,na.rm=TRUE)
# Create an interactive pie chart for 2023
pie_chart_2023 <- plot_ly(
  labels = ~salary_2023_final$Pos,            # Labels from the 'brands' vector
  values = ~salary_2023_final$salary_sum_2023,      # Values from the 'market_share' vector
  type = 'pie',                # Specify the chart type to be pie
  textinfo = 'percent',  # Display labels and percentage on the chart
  hoverinfo = 'label+value+text',
  hovertext = ~paste("Total Salary:", format(salary_total_2023, big.mark = ",", scientific = FALSE)),
  textposition = 'inside',     # Position the text inside the slices
  marker = list(line = list(color = '#FFFFFF', width = 2)), # Set slice borders
  height = sqrt(salary_total_2023)/30, 
  width = sqrt(salary_total_2023)/30
)

# Adding title and enhancing the layout
pie_chart_2023 <- layout(pie_chart_2023, 
                      title = 'Nationals Salaries By Positions in 2023')

pie_chart_2023